library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggfortify)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
#loading in data
kinetics_noise <- read.delim("ln_nas_linked_kinetics_noise.bed")
kinetics_noise <- kinetics_noise %>% filter(Next > 0)
#head(kinetics_noise)
ggplot(kinetics_noise) +
    geom_point(aes(x=bs, y=Nint)) +
    scale_y_log10() +
    scale_x_log10() +
    geom_smooth(aes(x=bs, y=Nint)) +
    theme_classic(base_size = 18) +
    ylab("Intrinsic Noise") +
    xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(kinetics_noise) +
  geom_point(aes(x=k_on, y=Nint)) +
  scale_y_log10() +
  scale_x_log10() +
  geom_smooth(aes(x=k_on, y=Nint))+
  theme_classic(base_size = 18) +
  ylab("Intrinsic Noise") +
  xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(kinetics_noise) +
        geom_point(aes(x=bs, y=Next)) +
        scale_y_log10() +
        scale_x_log10() +
        geom_smooth(aes(x=bs, y=Next))+
        theme_classic(base_size = 18) +
        ylab("Extrinsic Noise") +
        xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(kinetics_noise) +
  geom_point(aes(x=bs, y=Ntot)) +
  scale_y_log10() +
  scale_x_log10() +
  geom_smooth(aes(x=bs, y=Ntot))+
  theme_classic(base_size = 18) +
  ylab("Total Noise") +
  xlab("Burst Frequency")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

mid<-mean(log10(kinetics_noise$Nint))
ggplot(kinetics_noise) +
        geom_point(aes(x=bs, y=k_on, colour = log10(Nint))) +
        scale_x_log10() +
        scale_y_log10() +
        theme_minimal(base_size = 18) +
        scale_color_gradient2(midpoint=mid, low="blue", mid="grey",
                              high="red", space ="Lab" ) +
        ylab("Burst Frequency") +
        xlab("Burst Size")

mid2<-mean(log10(kinetics_noise$Next))
ggplot(kinetics_noise) +
        geom_point(aes(x=bs, y=k_on, colour = log10(Next))) +
        scale_x_log10() +
        scale_y_log10() +
        theme_minimal(base_size = 18) +
        scale_color_gradient2(midpoint=mid2, low="blue", mid="grey",
                              high="red", space ="Lab" ) +
        ylab("Burst Frequency") +
        xlab("Burst Size")

mid3<-mean(log10(kinetics_noise$Ntot))
ggplot(kinetics_noise) +
        geom_point(aes(x=bs, y=k_on, colour = log10(Ntot))) +
        scale_x_log10() +
        scale_y_log10() +
        theme_minimal(base_size = 18) +
        scale_color_gradient2(midpoint=mid, low="blue", mid="grey",
                              high="red", space ="Lab" ) +
        ylab("Burst Frequency") +
        xlab("Burst Size")

library(ggpubr)
t.test(log10(kinetics_noise$Ntot[which(as.logical(kinetics_noise$anchor))]),log10(kinetics_noise$Ntot[which(!as.logical(kinetics_noise$anchor))]))
## 
##  Welch Two Sample t-test
## 
## data:  log10(kinetics_noise$Ntot[which(as.logical(kinetics_noise$anchor))]) and log10(kinetics_noise$Ntot[which(!as.logical(kinetics_noise$anchor))])
## t = -2.3082, df = 319.91, p-value = 0.02163
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.081785149 -0.006518294
## sample estimates:
##   mean of x   mean of y 
## -0.08691602 -0.04276430
ggplot(kinetics_noise, aes(x=factor(anchor), Ntot, fill = anchor)) +
        geom_violin() +
        geom_boxplot(width=0.1) +
        theme_minimal(base_size = 12) +
        ylab( "Total Transcriptional Noise") +
        scale_y_log10() +
        xlab("") +
        scale_x_discrete(labels = NULL) +
        scale_fill_discrete(name = "Enhancer-Promoter \n Contact") +
        stat_compare_means(method = "t.test")

t.test(log10(kinetics_noise$Next[which(as.logical(kinetics_noise$anchor))]),log10(kinetics_noise$Next[which(!as.logical(kinetics_noise$anchor))]))
## 
##  Welch Two Sample t-test
## 
## data:  log10(kinetics_noise$Next[which(as.logical(kinetics_noise$anchor))]) and log10(kinetics_noise$Next[which(!as.logical(kinetics_noise$anchor))])
## t = 0.49126, df = 328.63, p-value = 0.6236
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04464586  0.07436634
## sample estimates:
##  mean of x  mean of y 
## -0.7837668 -0.7986271
ggplot(kinetics_noise, aes(x=factor(anchor), Next, fill = anchor)) +
        geom_violin() +
        geom_boxplot(width=0.1) +
        theme_minimal(base_size = 12) +
        ylab( "Extrinsic Transcriptional Noise") +
        scale_y_log10() +
        xlab("") +
        scale_x_discrete(labels = NULL) +
        scale_fill_discrete(name = "Enhancer-Promoter \n Contact") +
        stat_compare_means(method = "t.test")

t.test(log10(kinetics_noise$Nint[which(as.logical(kinetics_noise$anchor))]),log10(kinetics_noise$Nint[which(!as.logical(kinetics_noise$anchor))]))
## 
##  Welch Two Sample t-test
## 
## data:  log10(kinetics_noise$Nint[which(as.logical(kinetics_noise$anchor))]) and log10(kinetics_noise$Nint[which(!as.logical(kinetics_noise$anchor))])
## t = -2.7027, df = 307.42, p-value = 0.00726
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.11174353 -0.01758512
## sample estimates:
##  mean of x  mean of y 
## -0.2581415 -0.1934771
ggplot(kinetics_noise, aes(x=factor(anchor), Nint, fill = anchor)) +
        geom_violin() +
        geom_boxplot(width=0.1) +
        theme_minimal(base_size = 12) +
        ylab( "Intrinsic Transcriptional Noise") +
        scale_y_log10() +
        xlab("") +
        scale_x_discrete(labels = NULL) +
        scale_fill_discrete(name = "Enhancer-Promoter \n Contact") +
        stat_compare_means(method = "t.test")

ggplot(kinetics_noise, aes(factor(string.Freq), Next)) +
        geom_violin(scale = "width", linewidth = 1) +
        geom_dotplot(binaxis = "y", binwidth = 0.015,  mapping = aes(alpha = factor(string.Freq)), color = "blue") +
        scale_alpha_manual(values = c(0, seq(from = 0.2, to = 1 , by = 0.9/20))) +
        scale_y_log10() +
        geom_smooth(data = kinetics_noise, mapping= aes(Freq, Next), color = "red") +
        coord_cartesian(xlim = c(0,11)) +
        theme_minimal(base_size = 18) +
        theme(legend.position = "none") +
        ylab("Extrinsic Noise") +
        xlab("Number of Anchored Enhancers")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(kinetics_noise, aes(factor(string.Freq), Ntot)) +
        geom_violin(scale = "width", linewidth = 1) +
        geom_dotplot(binaxis = "y", binwidth = 0.015,  mapping = aes(alpha = factor(string.Freq)), color = "blue") +
        scale_alpha_manual(values = c(0, seq(from = 0.2, to = 1 , by = 0.9/20))) +
        scale_y_log10() +
        geom_smooth(data = kinetics_noise, mapping= aes(Freq, Ntot), color = "red") +
        coord_cartesian(xlim = c(0,11)) +
        theme_minimal(base_size = 18) +
        theme(legend.position = "none") +
        ylab("Total Noise") +
        xlab("Number of Anchored Enhancers")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

nint_model <- lm(log10(Nint) ~ Freq * (log10(k_on) + log10(bs)), kinetics_noise)
nint_model_logi <- lm(log10(Nint) ~ anchor + log10(k_on) + log10(bs), kinetics_noise)
resettest(nint_model)
## 
##  RESET test
## 
## data:  nint_model
## RESET = 79.073, df1 = 2, df2 = 2662, p-value < 2.2e-16
autoplot(nint_model)

vif(nint_model)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##             Freq      log10(k_on)        log10(bs) Freq:log10(k_on) 
##         7.265444         1.288812         1.282231         1.888372 
##   Freq:log10(bs) 
##         6.217500
summary(nint_model)
## 
## Call:
## lm(formula = log10(Nint) ~ Freq * (log10(k_on) + log10(bs)), 
##     data = kinetics_noise)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94236 -0.06866 -0.00269  0.06030  1.40088 
## 
## Coefficients:
##                   Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)       0.272525   0.008526   31.966   <2e-16 ***
## Freq             -0.003946   0.007457   -0.529    0.597    
## log10(k_on)      -0.934596   0.009026 -103.539   <2e-16 ***
## log10(bs)        -0.448763   0.010630  -42.218   <2e-16 ***
## Freq:log10(k_on) -0.017707   0.008952   -1.978    0.048 *  
## Freq:log10(bs)    0.015883   0.008136    1.952    0.051 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1537 on 2664 degrees of freedom
## Multiple R-squared:  0.8117, Adjusted R-squared:  0.8113 
## F-statistic:  2296 on 5 and 2664 DF,  p-value: < 2.2e-16
next_model <- lm(log10(Next) ~ (log10(k_on)) + log10(bs) + Freq, kinetics_noise)


vif(next_model)
## log10(k_on)   log10(bs)        Freq 
##    1.218016    1.216608    1.008407
resettest(next_model)
## 
##  RESET test
## 
## data:  next_model
## RESET = 103.07, df1 = 2, df2 = 2664, p-value < 2.2e-16
autoplot(next_model)

summary(next_model)
## 
## Call:
## lm(formula = log10(Next) ~ (log10(k_on)) + log10(bs) + Freq, 
##     data = kinetics_noise)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.75381 -0.18354  0.08278  0.27596  1.48119 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.074698   0.024376 -44.089   <2e-16 ***
## log10(k_on) -0.325383   0.025771 -12.626   <2e-16 ***
## log10(bs)    0.497552   0.030408  16.362   <2e-16 ***
## Freq         0.010101   0.008159   1.238    0.216    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4514 on 2666 degrees of freedom
## Multiple R-squared:  0.2152, Adjusted R-squared:  0.2143 
## F-statistic: 243.7 on 3 and 2666 DF,  p-value: < 2.2e-16
library(lmtest)
ntot_model <- lm(log10(Ntot) ~ log10(k_on) + log10(bs) + Freq , kinetics_noise)
vif(ntot_model)
## log10(k_on)   log10(bs)        Freq 
##    1.218016    1.216608    1.008407
resettest(ntot_model)
## 
##  RESET test
## 
## data:  ntot_model
## RESET = 2.5256, df1 = 2, df2 = 2664, p-value = 0.0802
autoplot(ntot_model)

summary(ntot_model)
## 
## Call:
## lm(formula = log10(Ntot) ~ log10(k_on) + log10(bs) + Freq, data = kinetics_noise)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79064 -0.06640 -0.01308  0.04525  1.09968 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  0.208523   0.006254   33.341   <2e-16 ***
## log10(k_on) -0.807971   0.006612 -122.194   <2e-16 ***
## log10(bs)   -0.161937   0.007802  -20.756   <2e-16 ***
## Freq         0.005389   0.002093    2.574   0.0101 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1158 on 2666 degrees of freedom
## Multiple R-squared:  0.8579, Adjusted R-squared:  0.8578 
## F-statistic:  5367 on 3 and 2666 DF,  p-value: < 2.2e-16

Finding interactions in higher order modeling

ntot_model <- lm(log10(Ntot) ~ (log10(k_on) + log10(bs)) * Freq, kinetics_noise)
vif(ntot_model, type = 'predictor')
## VIFs computed for predictors
## [1] 7.265444 7.265444 7.265444
resettest(ntot_model)
## 
##  RESET test
## 
## data:  ntot_model
## RESET = 2.4585, df1 = 2, df2 = 2662, p-value = 0.08575
autoplot(ntot_model)

summary(ntot_model)
## 
## Call:
## lm(formula = log10(Ntot) ~ (log10(k_on) + log10(bs)) * Freq, 
##     data = kinetics_noise)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79479 -0.06622 -0.01321  0.04500  1.08950 
## 
## Coefficients:
##                    Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)       0.2122184  0.0064166   33.074   <2e-16 ***
## log10(k_on)      -0.8091333  0.0067936 -119.103   <2e-16 ***
## log10(bs)        -0.1668531  0.0080002  -20.856   <2e-16 ***
## Freq             -0.0081820  0.0056126   -1.458    0.145    
## log10(k_on):Freq  0.0001049  0.0067376    0.016    0.988    
## log10(bs):Freq    0.0176400  0.0061233    2.881    0.004 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1157 on 2664 degrees of freedom
## Multiple R-squared:  0.8584, Adjusted R-squared:  0.8581 
## F-statistic:  3230 on 5 and 2664 DF,  p-value: < 2.2e-16
library(interactions)
library(colorspace)
q10 <- qualitative_hcl(10, "Dark2")
interact_plot(ntot_model, pred = bs, modx = Freq, modx.values = c(1:10), colors = q10, legend.main = "# of Enhancers") + theme_minimal(base_size = 18) +
        xlab("Burst Size") +
        ylab("Total Noise") +
        scale_x_log10()
## Using data kinetics_noise from global environment. This could cause
## incorrect results if kinetics_noise has been altered since the model was
## fit. You can manually provide the data to the "data =" argument.